COVID-19 Analysis, Visualization & Comparisons

In [1]:
#Install libraries
#!pip install --upgrade pip
#!pip install calmap
#!pip install us
#!pip install pycountry_convert
#!pip install plotly==3.8.1 
#!pip install cufflinks
In [2]:
!pip install psutil
!pip install chart-studio==1.0.0
Requirement already satisfied: psutil in /Users/useonsong/opt/anaconda3/lib/python3.7/site-packages (5.6.7)
Requirement already satisfied: chart-studio==1.0.0 in /Users/useonsong/opt/anaconda3/lib/python3.7/site-packages (1.0.0)
Requirement already satisfied: six in /Users/useonsong/opt/anaconda3/lib/python3.7/site-packages (from chart-studio==1.0.0) (1.14.0)
Requirement already satisfied: plotly in /Users/useonsong/opt/anaconda3/lib/python3.7/site-packages (from chart-studio==1.0.0) (4.9.0)
Requirement already satisfied: retrying>=1.3.3 in /Users/useonsong/opt/anaconda3/lib/python3.7/site-packages (from chart-studio==1.0.0) (1.3.3)
Requirement already satisfied: requests in /Users/useonsong/opt/anaconda3/lib/python3.7/site-packages (from chart-studio==1.0.0) (2.22.0)
Requirement already satisfied: idna<2.9,>=2.5 in /Users/useonsong/opt/anaconda3/lib/python3.7/site-packages (from requests->chart-studio==1.0.0) (2.8)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /Users/useonsong/opt/anaconda3/lib/python3.7/site-packages (from requests->chart-studio==1.0.0) (1.25.8)
Requirement already satisfied: certifi>=2017.4.17 in /Users/useonsong/opt/anaconda3/lib/python3.7/site-packages (from requests->chart-studio==1.0.0) (2019.11.28)
Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /Users/useonsong/opt/anaconda3/lib/python3.7/site-packages (from requests->chart-studio==1.0.0) (3.0.4)
In [3]:
#Load libraries
import json
from datetime import timedelta
from urllib.request import urlopen

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import plotly.io as pio
pio.renderers
pio.renderers.default = "svg"


from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected = True)

import warnings 
warnings.filterwarnings('ignore')
In [4]:
#color pallette
cnf, dth, rec, act = '#393e46', '#ff2e63', '#21bf73', '#fe9801' 
In [5]:
#full data
full_table = pd.read_csv('../data/covid_19_clean_complete.csv')
full_table.head()
Out[5]:
Province/State Country/Region Lat Long Date Confirmed Deaths Recovered Active WHO Region
0 NaN Afghanistan 33.93911 67.709953 2020-01-22 0 0 0 0 Eastern Mediterranean
1 NaN Albania 41.15330 20.168300 2020-01-22 0 0 0 0 Europe
2 NaN Algeria 28.03390 1.659600 2020-01-22 0 0 0 0 Africa
3 NaN Andorra 42.50630 1.521800 2020-01-22 0 0 0 0 Europe
4 NaN Angola -11.20270 17.873900 2020-01-22 0 0 0 0 Africa
In [6]:
#Grouped by day, country

full_grouped = pd.read_csv('../data/full_grouped.csv')
full_grouped['Date'] = pd.to_datetime(full_grouped['Date'])

full_grouped.head()
Out[6]:
Date Country/Region Confirmed Deaths Recovered Active New cases New deaths New recovered WHO Region
0 2020-01-22 Afghanistan 0 0 0 0 0 0 0 Eastern Mediterranean
1 2020-01-22 Albania 0 0 0 0 0 0 0 Europe
2 2020-01-22 Algeria 0 0 0 0 0 0 0 Africa
3 2020-01-22 Andorra 0 0 0 0 0 0 0 Europe
4 2020-01-22 Angola 0 0 0 0 0 0 0 Africa
In [7]:
#Day wise

day_wise = pd.read_csv('../data/datasets_494766_1376807_day_wise.csv')
day_wise['Date'] = pd.to_datetime(day_wise['Date'])

day_wise.head()
Out[7]:
Date Confirmed Deaths Recovered Active New cases New deaths New recovered Deaths / 100 Cases Recovered / 100 Cases Deaths / 100 Recovered No. of countries
0 2020-01-22 555 17 28 510 0 0 0 3.06 5.05 60.71 6
1 2020-01-23 654 18 30 606 99 1 2 2.75 4.59 60.00 8
2 2020-01-24 941 26 36 879 287 8 6 2.76 3.83 72.22 9
3 2020-01-25 1434 42 39 1353 493 16 3 2.93 2.72 107.69 11
4 2020-01-26 2118 56 52 2010 684 14 13 2.64 2.46 107.69 13
In [8]:
#Country wise

country_wise = pd.read_csv('../data/datasets_494766_1376807_country_wise_latest.csv')
country_wise = country_wise.replace('', np.nan).fillna(0)

country_wise.head()
Out[8]:
Country/Region Confirmed Deaths Recovered Active New cases New deaths New recovered Deaths / 100 Cases Recovered / 100 Cases Deaths / 100 Recovered Confirmed last week 1 week change 1 week % increase WHO Region
0 Afghanistan 36263 1269 25198 9796 106 10 18 3.50 69.49 5.04 35526 737 2.07 Eastern Mediterranean
1 Albania 4880 144 2745 1991 117 6 63 2.95 56.25 5.25 4171 709 17.00 Europe
2 Algeria 27973 1163 18837 7973 616 8 749 4.16 67.34 6.17 23691 4282 18.07 Africa
3 Andorra 907 52 803 52 10 0 0 5.73 88.53 6.48 884 23 2.60 Europe
4 Angola 950 41 242 667 18 1 0 4.32 25.47 16.94 749 201 26.84 Africa
In [9]:
#Worldmeter data

worldmeter_data = pd.read_csv('../data/datasets_494766_1376807_worldometer_data.csv')
worldmeter_data = worldmeter_data.replace('', np.nan).fillna(0)

worldmeter_data.head()
Out[9]:
Country/Region Continent Population TotalCases NewCases TotalDeaths NewDeaths TotalRecovered NewRecovered ActiveCases Serious,Critical Tot Cases/1M pop Deaths/1M pop TotalTests Tests/1M pop WHO Region
0 USA North America 3.311452e+08 4433410 0.0 150444.0 0.0 2136603.0 0.0 2146363.0 19100.0 13388.0 454.0 55018236.0 166145.0 Americas
1 Brazil South America 2.126695e+08 2446397 2917.0 87737.0 58.0 1667667.0 0.0 690993.0 8318.0 11503.0 413.0 12601096.0 59252.0 Americas
2 India Asia 1.380975e+09 1484136 1633.0 33461.0 13.0 954004.0 815.0 496671.0 8944.0 1075.0 24.0 17334885.0 12553.0 South-EastAsia
3 Russia Europe 1.459392e+08 818120 0.0 13354.0 0.0 603329.0 0.0 201437.0 2300.0 5606.0 92.0 27141966.0 185981.0 Europe
4 South Africa Africa 5.936114e+07 452529 0.0 7067.0 0.0 274925.0 0.0 170537.0 539.0 7623.0 119.0 2802211.0 47206.0 Africa
In [10]:
#Latest

temp = day_wise[['Date', 'Deaths', 'Recovered', 'Active']].tail(1)
temp = temp.melt(id_vars = 'Date', value_vars = ['Active', 'Deaths', 'Recovered'])
temp
Out[10]:
Date variable value
0 2020-07-27 Active 6358362
1 2020-07-27 Deaths 654036
2 2020-07-27 Recovered 9468087
In [11]:
fig = px.treemap(temp, path = ['variable'], values = 'value', height = 225,
          color_discrete_sequence = [act, rec, dth])
fig.data[0].textinfo = 'label+text+value'
fig.show()
In [12]:
#Maps

def plot_map(df, col, pal):
    df = df[df[col]>0]
    fig = px.choropleth(df, locations='Country/Region', locationmode='country names',
                       color = col, hover_name='Country/Region',
                       title = col, hover_data=[col], color_continuous_scale=pal)
    fig.show()
In [13]:
plot_map(country_wise, 'Confirmed', 'matter')
In [14]:
plot_map(country_wise, 'Deaths', 'matter')
In [15]:
plot_map(country_wise, 'Deaths / 100 Cases', 'matter')
In [16]:
#Over the time

fig = px.choropleth(full_grouped, locations = 'Country/Region',
                   color=np.log(full_grouped['Confirmed']),
                   locationmode='country names', hover_name='Country/Region',
                   animation_frame=full_grouped['Date'].dt.strftime('%Y-%m-%d'),
                   title='Cases over time', color_continuous_scale=px.colors.sequential.matter)
fig.update(layout_coloraxis_showscale=False)
fig.show()
In [17]:
#Cases over the time

def plot_daywise(col, hue):
    fig = px.bar(day_wise, x = 'Date', y = col, width = 700, color_discrete_sequence = [hue])
    fig.update_layout(title = col, xaxis_title = '', yaxis_title = '')
    fig.show()
In [18]:
def plot_daywise_line(col, hue):
    fig = px.line(day_wise, x = 'Date', y = col, width = 700, color_discrete_sequence= [hue])
    fig.update_layout(title = col, xaxis_title = '', yaxis_title = '')
    fig.show()
In [19]:
temp = full_grouped.groupby('Date')['Recovered', 'Deaths', 'Active'].sum().reset_index()
temp = temp.melt(id_vars = 'Date', value_vars = ['Recovered', 'Deaths', 'Active'],
                var_name = 'Case', value_name = 'Count')
temp.head()

fig = px.area(temp, x = 'Date', y = 'Count', color = 'Case', height = 600, width = 700,
             title = 'Cases over time', color_discrete_sequence = [rec, dth, act])
fig.update_layout(xaxis_rangeslider_visible = True)
fig.show()
In [20]:
plot_daywise('Confirmed', '#333333')
In [21]:
plot_daywise('Active', '#333333')
In [22]:
plot_daywise('New cases', '#333333')
In [23]:
plot_daywise('Deaths', dth)
In [24]:
plot_daywise('New deaths', dth)
In [25]:
plot_daywise('Recovered', rec)
In [26]:
plot_daywise('New recovered', rec)
In [27]:
plot_daywise_line('Deaths / 100 Cases', dth)
In [28]:
plot_daywise_line('Deaths / 100 Recovered', dth)
In [29]:
plot_daywise_line('Recovered / 100 Cases', rec)
In [30]:
plot_daywise('No. of countries', '#035aa6')
In [31]:
temp = day_wise[['Date', 'Recovered', 'Active']]
temp = temp.melt(id_vars = 'Date', value_vars = ['Recovered', 'Active'],
                var_name = 'Variable', value_name = 'Count')
px.line(temp, x = 'Date', y = 'Count', color = 'Variable')
In [32]:
#Top 20 Countries

def plot_hbar(df, col, n, hover_data = []):
    fig = px.bar(df.sort_values(col).tail(n),
                x = col, y = 'Country/Region', color = 'WHO Region',
                text = col, orientation = 'h', width = 700, hover_data = hover_data,
                color_discrete_sequence = px.colors.qualitative.Dark2)
    fig.update_layout(title = col, xaxis_title = '', yaxis_title = '',
                      yaxis_categoryorder = 'total ascending',
                      uniformtext_minsize = 8, uniformtext_mode = 'hide')
    fig.show()
In [33]:
def plot_hbar_wm(col, n, min_pop = 1000000, sort = 'descending'):
    df = worldmeter_data[worldmeter_data['Population'] > min_pop]
    df = df.sort_values(col, ascending = True).tail(n)
    fig = px.bar(df,
                x = col, y = 'Country/Region', color = 'WHO Region',
                text = col, orientation = 'h', width = 700,
                color_discrete_sequence = px.colors.qualitative.Dark2)
    fig.update_layout(title = col+' (Only countries with > 1M Pop)',
                     xaxis_title = '', yaxis_title = '',
                     yaxis_categoryorder = 'total ascending',
                     uniformtext_minsize = 8, uniformtext_mode = 'hide')
    fig.show()
In [34]:
plot_hbar(country_wise, 'Confirmed', 15)
In [35]:
plot_hbar(country_wise, 'Active', 15)
In [36]:
plot_hbar(country_wise, 'New cases', 15)
In [37]:
plot_hbar(country_wise, 'Deaths', 15)
In [38]:
plot_hbar(country_wise, 'New deaths', 15)
In [39]:
plot_hbar(country_wise, 'Deaths / 100 Cases', 15)
In [40]:
plot_hbar(country_wise, 'Recovered', 15)
In [41]:
plot_hbar(country_wise, 'New recovered', 15)
In [42]:
plot_hbar(country_wise, 'Recovered / 100 Cases', 15)
In [43]:
plot_hbar(country_wise, '1 week change', 15)
In [44]:
plot_hbar(country_wise, '1 week % increase', 15)
In [45]:
plot_hbar_wm('Tot Cases/1M pop', 15, 100000)
In [46]:
plot_hbar_wm('Deaths/1M pop', 15, 1000000)
In [47]:
plot_hbar_wm('TotalTests', 15, 1000000)
In [48]:
plot_hbar_wm('Tests/1M pop', 15)
In [50]:
#Data vs

def plot_stacked(col):
    fig = px.bar(full_grouped, x = 'Date', y = col, color = 'Country/Region',
                height = 600, title = col,
                color_discrete_sequence = px.colors.cyclical.mygbm)
    fig.update_layout(showlegend = True)
    fig.show()
In [51]:
def plot_line(col):
    fig = px.line(full_grouped, x = 'Date', y =col, color = 'Country/Region',
                 height = 600, title = col,
                 color_discrete_sequence = px.colors.cyclical.mygbm)
    fig.update_layout(showlegend = True)
    fig.show()
In [52]:
plot_stacked('Confirmed')
In [55]:
plot_stacked('Deaths')
In [56]:
plot_stacked('New cases')
In [57]:
plot_stacked('Active')
In [59]:
plot_line('Confirmed')
In [60]:
plot_line('Deaths')
In [61]:
plot_line('New cases')
In [62]:
plot_line('Active')
In [66]:
temp = pd.merge(full_grouped[['Date', 'Country/Region', 'Confirmed', 'Deaths']],
               day_wise[['Date', 'Confirmed', 'Deaths']], on = 'Date')
temp['% Confirmed'] = round(temp['Confirmed_x']/temp['Confirmed_y'], 3) * 100
temp['% Deaths'] = round(temp['Deaths_x']/temp['Deaths_y'], 3) * 100
temp.head()
Out[66]:
Date Country/Region Confirmed_x Deaths_x Confirmed_y Deaths_y % Confirmed % Deaths
0 2020-01-22 Afghanistan 0 0 555 17 0.0 0.0
1 2020-01-22 Albania 0 0 555 17 0.0 0.0
2 2020-01-22 Algeria 0 0 555 17 0.0 0.0
3 2020-01-22 Andorra 0 0 555 17 0.0 0.0
4 2020-01-22 Angola 0 0 555 17 0.0 0.0
In [69]:
fig = px.bar(temp, x = 'Date', y = '% Confirmed', color = 'Country/Region',
            range_y = (0, 100), title = '% of Cases from each country',
            color_discrete_sequence = px.colors.qualitative.Prism)
fig.show()
In [73]:
fig = px.bar(temp, x = 'Date', y = '% Deaths', color = 'Country/Region',
            range_y = (0, 100), title = '% of Cases from each country',
            color_discrete_sequence = px.colors.qualitative.Prism)
fig.show()
In [86]:
def gt_n(n):
    countries = full_grouped[full_grouped['Confirmed'] > n]['Country/Region'].unique()
    temp = full_table[full_table['Country/Region'].isin(countries)]
    temp = temp.groupby(['Country/Region', 'Date'])['Confirmed'].sum().reset_index()
    temp = temp[temp['Confirmed'] > n]
     
    min_date = temp.groupby('Country/Region')['Date'].min().reset_index()
    min_date.columns = ['Country/Region', 'Min Date']
    
    from_nth_case = pd.merge(temp, min_date, on = 'Country/Region')
    from_nth_case['Date'] = pd.to_datetime(from_nth_case['Date'])
    from_nth_case['Min Date'] = pd.to_datetime(from_nth_case['Min Date'])
    from_nth_case['N days'] = (from_nth_case['Date'] - from_nth_case['Min Date']).dt.days
    
    fig = px.line(from_nth_case, x = 'N days', y = 'Confirmed', color = 'Country/Region',
                 title = 'N days from' + str(n) + ' case', height = 600)
    fig.show()
In [87]:
gt_n(100000)
In [90]:
fig = px.scatter(country_wise.sort_values('Deaths', ascending = False).iloc[:20, :],
                x = 'Confirmed', y = 'Deaths', color = 'Country/Region', size = 'Confirmed',
                height = 700, text = 'Country/Region', log_x = True, log_y = True,
                title = 'Deaths vs Confirmed (Scale is in log10)')
fig.update_traces(textposition = 'top center')
fig.update_layout(showlegend = False)
fig.update_layout(xaxis_rangeslider_visible = True)
fig.show()
In [91]:
#Composition of Cases

def plot_treemap(col):
    fig = px.treemap(country_wise, path = ['Country/Region'], values = col, height = 700,
                    title = col, color_discrete_sequence = px.colors.qualitative.Dark2)
    fig.data[0].textinfo = 'label+text+value'
    fig.show()
In [92]:
plot_treemap('Confirmed')
In [93]:
plot_treemap('Deaths')
In [94]:
def plot_bubble(col, pal):
    temp = full_grouped[full_grouped[col] > 0].sort_values('Country/Region',
                                                          ascending = False)
    fig = px.scatter(temp, x = 'Date', y = 'Country/Region', size = col, color = col, height = 3000,
                    color_continuous_scale = pal)
    fig.update_layout(yaxis = dict(dtick = 1))
    fig.update(layout_coloraxis_showscale = False)
    fig.show()
In [95]:
plot_bubble('New cases', 'Viridis')
In [97]:
plot_bubble('Active', 'Viridis')
In [99]:
temp = full_grouped[['Date', 'Country/Region', 'New cases']]
temp['New cases reported ?'] = temp['New cases'] != 0
temp['New cases reported ?'] = temp['New cases reported ?'].astype(int)
In [100]:
fig = go.Figure(data = go.Heatmap(
        z = temp['New cases reported ?'],
        x = temp['Date'],
        y = temp['Country/Region'],
        colorscale = 'Emrld',
        showlegend = False,
        text = temp['New cases reported ?']))
fig.update_layout(yaxis = dict(dtick = 1))
fig.update_layout(height = 3000)
fig.show()
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: